{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# TiDB Graph Store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-llms-openai\n",
    "%pip install llama-index-graph-stores-tidb\n",
    "%pip install llama-index-embeddings-openai\n",
    "%pip install llama-index-llms-azure-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# For OpenAI\n",
    "\n",
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxxxxx\"\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "from llama_index.llms.openai import OpenAI\n",
    "from llama_index.core import Settings\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "\n",
    "# define LLM\n",
    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "Settings.llm = llm\n",
    "Settings.chunk_size = 512"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# For Azure OpenAI\n",
    "import os\n",
    "import openai\n",
    "from llama_index.llms.azure_openai import AzureOpenAI\n",
    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(\n",
    "    stream=sys.stdout, level=logging.INFO\n",
    ")  # logging.DEBUG for more verbose output\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "openai.api_type = \"azure\"\n",
    "openai.api_base = \"https://<foo-bar>.openai.azure.com\"\n",
    "openai.api_version = \"2022-12-01\"\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"<your-openai-key>\"\n",
    "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "llm = AzureOpenAI(\n",
    "    deployment_name=\"<foo-bar-deployment>\",\n",
    "    temperature=0,\n",
    "    openai_api_version=openai.api_version,\n",
    "    model_kwargs={\n",
    "        \"api_key\": openai.api_key,\n",
    "        \"api_base\": openai.api_base,\n",
    "        \"api_type\": openai.api_type,\n",
    "        \"api_version\": openai.api_version,\n",
    "    },\n",
    ")\n",
    "\n",
    "# You need to deploy your own embedding model as well as your own chat completion model\n",
    "embedding_llm = OpenAIEmbedding(\n",
    "    model=\"text-embedding-ada-002\",\n",
    "    deployment_name=\"<foo-bar-deployment>\",\n",
    "    api_key=openai.api_key,\n",
    "    api_base=openai.api_base,\n",
    "    api_type=openai.api_type,\n",
    "    api_version=openai.api_version,\n",
    ")\n",
    "\n",
    "Settings.llm = llm\n",
    "Settings.embed_model = embedding_llm\n",
    "Settings.chunk_size = 512"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using Knowledge Graph with TiDB"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prepare a TiDB cluster\n",
    "\n",
    "- [TiDB Cloud](https://tidb.cloud/) [Recommended], a fully managed TiDB service that frees you from the complexity of database operations.\n",
    "- [TiUP](https://docs.pingcap.com/tidb/stable/tiup-overview), use `tiup playground`` to create a local TiDB cluster for testing."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get TiDB connection string\n",
    "\n",
    "For example: `mysql+pymysql://user:password@host:4000/dbname`, in TiDBGraphStore we use pymysql as the db driver, so the connection string should be `mysql+pymysql://...`.\n",
    "\n",
    "If you are using a TiDB Cloud serverless cluster with public endpoint, it requires TLS connection, so the connection string should be like `mysql+pymysql://user:password@host:4000/dbname?ssl_verify_cert=true&ssl_verify_identity=true`.\n",
    "\n",
    "Replace `user`, `password`, `host`, `dbname` with your own values."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Initialize TiDBGraphStore"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.graph_stores.tidb import TiDBGraphStore\n",
    "\n",
    "graph_store = TiDBGraphStore(\n",
    "    db_connection_string=\"mysql+pymysql://user:password@host:4000/dbname\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instantiate TiDB KG Indexes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import (\n",
    "    KnowledgeGraphIndex,\n",
    "    SimpleDirectoryReader,\n",
    "    StorageContext,\n",
    ")\n",
    "\n",
    "documents = SimpleDirectoryReader(\n",
    "    \"../../../examples/data/paul_graham/\"\n",
    ").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "storage_context = StorageContext.from_defaults(graph_store=graph_store)\n",
    "\n",
    "# NOTE: can take a while!\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents=documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Querying the Knowledge Graph"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
      "WARNING:llama_index.core.indices.knowledge_graph.retrievers:Index was not constructed with embeddings, skipping embedding usage...\n",
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
     ]
    }
   ],
   "source": [
    "query_engine = index.as_query_engine(\n",
    "    include_text=False, response_mode=\"tree_summarize\"\n",
    ")\n",
    "response = query_engine.query(\n",
    "    \"Tell me more about Interleaf\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<b>Interleaf was a software company that developed a scripting language and was known for its software products. It was inspired by Emacs and faced challenges due to Moore's law. Over time, Interleaf's prominence declined.</b>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.display import Markdown, display\n",
    "\n",
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
